#
# Fig1-GSS.R.
#
# Analysis of happiness in the
# General Social Survey.
#
# Jeffrey Tu, j4tu@ucsd.edu
#

# rm(list = ls())
# setwd("SET WD HERE")

# Libraries
library(dplyr)
library(ggplot2)

if (F) { # Create data using gssr package
  ## Data from gssr package. 
  # Install 'gssr' from 'ropensci' universe
  # install.packages('gssr', repos = c('https://kjhealy.r-universe.dev', 'https://cloud.r-project.org'))
  library(gssr)

  # Load all GSS Cross Sectional Data
  data("gss_all")

  # Select Year and Happiness
  # Variable coding is explained in the codebook here: 
  # https://gss.norc.org/content/dam/gss/get-documentation/pdf/codebook/GSS%202022%20Codebook.pdf
  full_df <- select(as.data.frame(gss_all), year, happy)

  # Save the filtered DF for the archive
  write.csv(full_df, 'GSSData.csv')
} else {
  # 'GSSData.csv' distributed with replication archive.
  full_df <- read.csv('GSSData.csv')
}

# Flip happiness coding so that a higher number indicates higher happiness
full_df$happy <- case_when(
  full_df$happy == 1 ~ 3,
  full_df$happy == 2 ~ 2,
  full_df$happy == 3 ~ 1
)

# Generate means by year
aggregated_data <- full_df %>%
  filter(year>=1972 & year<=2018) %>%
  group_by(year) %>%
  summarize(mean_happy = mean(happy, na.rm = TRUE), .groups = "drop")

# Plot the time series
plot <- ggplot(aggregated_data, aes(x = year, y = mean_happy)) +
  geom_line(size = 1) +
  labs(
    x = "Year",
    y = "Average \nHappiness \nIndex",
    color = "Industry Group"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 10),
    legend.title = element_text(size = 10),
    legend.text = element_text(size = 9),
    plot.margin = margin(11,105,11,11, "pt"),
    axis.title.y = element_text(angle = 0, vjust = 0.5)
  ) +
  geom_smooth(method = 'lm',se=FALSE, color = scales::alpha("blue", 0.5))

# Save the plot
ggsave("Figure_1.pdf", plot, width = 10, height = 6, units = "in")
